
clear all
set more off

** Change path to re-run on another computer
cd ""
**

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***1) GDP deflator (from WDI)
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
insheet using "Other data/GDP deflator/update2017_GDP_deflator_data.csv", nodouble comma clear

drop v1 v2

drop in 1

rename * deflator#, renumber(1958)
rename (deflator1958 deflator1959) (country isoalpha3code)

compress
drop if country==""

reshape long deflator, i(country) j(year)

destring deflator, replace float force

drop if year>=2016
replace deflator=deflator/100

replace isoalpha3code="ROU" if isoalpha3code=="ROM"

label var deflator "GDP deflator (base year varies by country)"

order country iso

**Add Taiwan from their national statistics database https://eng.stat.gov.tw/:
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"
merge 1:1 country year using "nonOECD_otherpricedata/Taiwan/Taiwan_ER_deflator", update
drop _m exch

save "Other data/GDP deflator/GDP_deflator", replace

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***2a) Exchange rate data
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

insheet using "Other data/Exchange rates/update2017_Exchange_rates_WDI.csv", nodouble comma clear

drop v1 v2

drop in 1

rename * exchange_rate#, renumber(1958)
rename (exchange_rate1958 exchange_rate1959) (country isoalpha3code)
drop if country==""
compress
reshape long exchange_rate, i(country) j(year)

destring exchange_rate, replace float force

replace isoalpha3code="ROU" if isoalpha3code=="ROM"

label var exchange_rate "Official exchange rate (LCU per US$, period average)"

//express euro countries currencies in German Euro, French Euro etc until common Euro
gen conversion_euro=.
replace conversion_euro=40.3399 if iso=="BEL"
replace conversion_euro= 1.95583 if iso=="DEU"
replace conversion_euro= 166.386 if iso=="ESP"
replace conversion_euro= 6.55957 if iso=="FRA"
replace conversion_euro= 0.787564 if iso=="IRL"
replace conversion_euro= 1936.27 if iso=="ITA"
replace conversion_euro= 40.3399 if iso=="LUX"
replace conversion_euro= 2.20371 if iso=="NLD"
replace conversion_euro= 13.7603 if iso=="AUT"
replace conversion_euro= 200.482 if iso=="PRT"
replace conversion_euro= 5.94573 if iso=="FIN"
replace conversion_euro= 340.750 if iso=="GRC"
replace conversion_euro= 0.585274 if iso=="CYP"
replace conversion_euro= 15.6466 if iso=="EST"
replace conversion_euro= 0.702804 if iso=="LVA"
replace conversion_euro= 6.55957 if iso=="MCO"
replace conversion_euro= 0.4293 if iso=="MLT"
replace conversion_euro= 239.64 if iso=="SVN"
replace conversion_euro= 30.126 if iso=="SVK"

replace exchange_rate= exchange_rate/conversion_euro if conversion_euro!=.

gen temp=exchange_rate if iso=="EMU"
bysort year: egen euro=total(temp) if iso=="EMU" |  iso=="AUT" | iso=="BEL" | iso=="CYP" | iso=="EST" | iso=="FIN" | iso=="FRA" | iso=="DEU" | iso=="GRC" |  iso=="IRL" | iso=="ITA" | iso=="LVA" | iso=="LUX" | iso=="MCO" | iso=="MLT" | iso=="NLD" | iso=="PRT" | iso=="SVK" | iso=="SVN" | iso=="ESP", missing

replace exchange_rate=euro if exchange_rate==. & euro!=.

drop if year>=2016
drop conv temp euro

order country iso

**Add Taiwan from their national statistics database https://eng.stat.gov.tw/:
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"
merge 1:1 country year using "nonOECD_otherpricedata/Taiwan/Taiwan_ER_deflator", update
drop _m deflator

save "Other data/Exchange rates/exchange_rates.dta", replace

*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***2b) Create PPP conversion factor
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

insheet using "Other data/PPP conversion factor/update2017_PA.NUS.PPP.indicator.csv", nodouble comma clear

drop in 1
drop v1 v2

rename * PPPfactor#, renumber(1956)
drop PPPfactor1958 PPPfactor1959
rename (PPPfactor1956 PPPfactor1957) (country isoalpha3code)

drop if country==""
compress

reshape long PPPfactor, i(country) j(year)

destring PPPfactor, replace float force

replace isoalpha3code="ROU" if isoalpha3code=="ROM"

label var PPPfactor "PPP conversion factor, GDP (LCU per international $)"

drop if year>=2016 

save "Other data/PPP conversion factor/PPPfactors.dta", replace


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***3) Energy prices
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

*a) real price index
*****************************
insheet using "IEAEnergypriceindex/update2016_real_nom_index_industry CLEAN AUSTRIA.csv", nodouble comma clear
rename LOCATION location
rename time year
keep location country v4 v6 year value
rename value v


gen z1=lower(v4)
replace z1 = subinstr(z1," ","",.)
replace z1 = subinstr(z1,"automotivediesel","autodiesel",.)

gen z2=lower(v6)
replace z2 = subinstr(z2," ","",.)
replace z2 = subinstr(z2,"nominalindex","nomindex",.)
replace z2 = subinstr(z2,"forindustry","_industry",.)
gen z3=z1+"_"+z2

drop v4 v6 z1 z2 
reshape wide v, i(location country year) j(z3) string

drop if year>=2016
rename v* *
drop location

unique country

preserve
keep if cou=="OECD Americas" | cou=="OECD Asia and Oceania"  | cou=="OECD Europe"  | cou=="OECD Total" 
rename *industry *

replace cou="OECD_A" if cou=="OECD Americas" 
replace cou="OECD_AO" if cou=="OECD Asia and Oceania" 
replace cou="OECD_E" if cou=="OECD Europe" 
replace cou="OECD_T" if cou=="OECD Total" 

reshape wide *index_,i(year) j(cou) string

save "IEAEnergypriceindex/price_index_oecd", replace
//merge later after rectangularization
restore
drop if cou=="OECD Americas" | cou=="OECD Asia and Oceania"  | cou=="OECD Europe"  | cou=="OECD Total" 

save "IEAEnergypriceindex/price_index", replace

*b) Add wholesale price index for some non-OECD country
*****************************
insheet using "IEAEnergypriceindex/update2016_wholesale_index.csv", nodouble comma clear

rename time year
keep location country v4 year value

gen z1=lower(v4)
replace z1 = subinstr(z1," ","",.)
drop v4
rename value wi_
reshape wide wi_, i(location country year) j(z1) string

compress

drop location

unique country

* add wholesaleprice index from old file for non-OECD countries:
preserve
insheet using "IEAEnergypriceindex/whole_re_08761388122724.csv", nodouble comma clear
rename time year
drop if year==.
drop index
rename * wi_*
rename (wi_country wi_year) (country year)
destring wi_oilproducts wi_electricity wi_naturalgas wi_coal, replace force float
compress
tempfile oldwi
save `oldwi'
restore
preserve
merge 1:1 country year using `oldwi', update
assert _m!=4 // no missing in new which are in old
restore
merge 1:1 country year using `oldwi'
assert year>2011 if _m==1
// make sure that no country mixed indices because of base year:
replace _m=. if _m==3 // not replaced ones from new
replace _m=. if year<1993 // not replaced ones from new
bysort cou: egen mt=mean(_m)
*br if mt>1 & mt<2
// it is fine: just additional years with missing values in the older series
drop _m mt
unique country
// 41 countries

save "IEAEnergypriceindex/price_index_wholesale", replace


*c) end use prices INDUSTRY
*****************************
insheet using "IEAEnergypriceindex/update2016_end_toe_nat_curr_Industry.csv", nodouble comma clear

rename time year

keep location country v4 v8 year value

*only keep prices including tax, so total price:
keep if v8=="Total price (nat. cur./toe NCV)"
drop v8

gen z1=lower(v4)
replace z1 = subinstr(z1," ","",.)
drop v4

reshape wide value, i(location country year) j(z1) string
rename value* *

drop if year>=2016

drop location
fillin country year
drop _fill

*need to add the non-OECD series:
preserve
insheet using "IEAEnergypriceindex/end_toe_nat_curr_Industry.csv", nodouble comma clear
rename time year
drop if year==.
destring high-liq, replace force float
tempfile oldend
save `oldend'
restore

merge 1:1 country year using `oldend', update
tab year if _m==1
drop _m

********
*
gen mdummy=1
*
********

*Add indian gas price
*****************************
merge 1:1 country year using "nonOECD_otherpricedata/India/India_gas", update

replace mdummy=2 if _m==4 | _m==2

drop _m


save "IEAEnergypriceindex/Industry_total_price_toe", replace
// CONVERT INDIAN GAS PRICE TO NAT CURRENCY later

**d) end use prices ELECTRICITY GENERATION (to fill in some missing values)
*****************************
insheet using "IEAEnergypriceindex/update2016_end_toe_nat_cur_EG.csv", nodouble comma clear

rename time year
*only keep prices including tax, so total price:
keep if v8=="Total price (nat. cur./toe NCV)"

keep country year v4  value

gen z1=lower(v4)
replace z1 = subinstr(z1," ","",.)
drop v4

reshape wide value, i( country year) j(z1) string
rename value* *

drop if year>=2016
unique cou

*add non-oecd countries:
preserve
insheet using "IEAEnergypriceindex/end_toe_nat_cur_EG.csv", nodouble comma clear
rename * EG_*
rename (EG_time EG_country) (year country)
drop if year==.
destring EG_high-EG_lique, replace force float
compress
unique cou
tempfile oldeg
save `oldeg'
restore

merge 1:1 country year using `oldeg', update
tab year if _m==1
drop _m

save "IEAEnergypriceindex/electricity_generation_total_price_USDtoe.dta", replace


**e) Create dta file with total prices usd/toe industry & elec. gen.
*****************************
use  "IEAEnergypriceindex/Industry_total_price_toe.dta", clear
merge 1:1 country year using "IEAEnergypriceindex/electricity_generation_total_price_USDtoe"
drop _m
save "IEAEnergypriceindex/Ind.&EG_total_price_toe.dta", replace

**f) Fill in missin Mexico industry price for steam coal from electricity generation
*****************************
use  "IEAEnergypriceindex/Ind.&EG_total_price_toe.dta", clear

replace steamcoal=EG_steamcoal if country=="Mexico"
replace mdummy=2 if country=="Mexico" & EG_steamcoal!=.

*g) Fill in missin Indonisia industry price for nat gas with growth rate from nat gas electricity generation
*****************************
egen id=group(country)
xtset id year

gen gro_temp=(EG_naturalgas-L.EG_naturalgas)/L.EG_naturalgas if country=="Indonesia"

gen dtemp=naturalgas

forvalues i = 1/20{
replace naturalgas=L.naturalgas*(1+gro_temp) if country=="Indonesia" & naturalgas==.
}
replace mdummy=2 if dtemp!=naturalgas
drop gro_temp dtemp


*h) TAIWAN:  Take growth rate of low sulphur oil and apply to high sulphur price (highly correlated)
*****************************
gen gro_temp=(lowsulphur-L.lowsulphur)/L.lowsulphur if country=="Chinese Taipei"

gen dtemp= highsulphurfuel

forvalues i = 1/20{
replace highsulphurfueloil=F.highsulphurfueloil/(1+F.gro_temp) if country=="Chinese Taipei" & highsulphurfueloil==.
}

replace mdummy=2 if dtemp!= highsulphurfueloil

* take steam coal growth rate of elec gen and apply to industry steam coal price
drop gro_temp dtemp

gen gro_temp=(EG_steamcoal-L.EG_steamcoal)/L.EG_steamcoal if country=="Chinese Taipei"
forvalues i = 1/20{
replace steamcoal = L.steamcoal*(1+gro_temp) if country=="Chinese Taipei" & steamcoal ==.
}

drop id gro_temp EG*

save "IEAEnergypriceindex/Industry_total_price_toe.dta", replace


*i) BRAZIL: import data
*****************************
import excel "nonOECD_otherpricedata/Brazil/update2016_Brazil fuel data.xlsx", sheet("Sheet1") firstrow allstring clear

destring , replace force float

gen country="Brazil"
rename boe0146toe year

//bpe means barrel oil equivalent = 0.146 toe.


keep country year FUELOIL NATURALGASINDUSTRY INDUSTRIALELECTRICITY STEAMCOAL

drop if year<1978
foreach x in FUELOIL NATURALGASINDUSTRY INDUSTRIALELECTRICITY STEAMCOAL{
replace `x'=`x'/0.146
}

save "nonOECD_otherpricedata/Brazil/Brazil fuel data.dta" , replace

use  "IEAEnergypriceindex/Industry_total_price_toe", clear

merge 1:1 country year using "nonOECD_otherpricedata/Brazil/Brazil fuel data.dta"

drop if year<1978
drop if year==2016

replace highsulphurfueloil=FUELOIL if country=="Brazil"
replace naturalgas=NATURALGASINDUSTRY if country=="Brazil"
replace steamcoal=STEAMCOAL if country=="Brazil"
replace electricity=INDUSTRIALELECTRICITY if country=="Brazil"

replace mdummy=2 if country=="Brazil"

drop _m FUELOIL NATURALGASINDUSTRY INDUSTRIALELECTRICITY STEAMCOAL

save  "IEAEnergypriceindex/Industry_total_price_toe", replace
// NEED TO CONVERT BRAZILIAN PRICES TO NAT CURRENCY later


*h) South Africa prices
*****************************
// natural gas and electricity can take as such
// but oil and coal use as growth rates

import excel "nonOECD_otherpricedata/South Africa/update2016_SAenergyprices.xlsx",  firstrow  clear

rename * *new
rename year year

gen country="South Africa" 
merge 1:1 year country using "IEAEnergypriceindex/Industry_total_price_toe"
keep if cou=="South Africa" 

replace naturalgas=naturalgasnew if naturalgas==.
replace electricity=electricitynew if electricity==.

sort year
gen go=dieselnew/dieselnew[_n-1]
replace highsulphurfueloil=highsulphurfueloil[_n-1]*go if highsulphurfueloil==.

gen gs=steamcoalnew/steamcoalnew[_n-1]
replace steamcoal=steamcoal[_n-1]*gs if steamcoal==.

drop naturalgasnew steamcoalnew dieselnew electricitynew _merge go gs mdummy

tempfile sa
save `sa'

use "IEAEnergypriceindex/Industry_total_price_toe

merge 1:1 year country using `sa', update
replace mdummy=2 if _m==2
replace mdummy=2 if _m==4
tab mdummy

drop _m
sort country year
save  "IEAEnergypriceindex/Industry_total_price_toe", replace


*i) Add germany coal index and deflate the index to get real coal index
*****************************
 
import excel "nonOECD_otherpricedata/Germany coal/destatis/D 302-Steinkohle -Braunkohle Basis 2005=100.xls", sheet("forstata") firstrow  clear

gen country="Germany"
rename coal coal_nomindex_industry

save "nonOECD_otherpricedata/Germany coal/destatis/germany_coalindex.dta", replace


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***4) merge end use prices, index, isocodes, deflator and exchange rates and PPP factors
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

use  "IEAEnergypriceindex/Industry_total_price_toe", clear


merge 1:1 country year using "IEAEnergypriceindex/price_index"
// non-OECD unfortunately only in end use prices and not in indicies
drop _m

merge 1:1 country year using "IEAEnergypriceindex/price_index_wholesale"
drop _m

replace country="Korea, Republic of" if country=="Korea"
replace country="Slovakia" if country=="Slovak Republic"
replace country="United States of America" if country=="United States"
replace country="Venezuela (Bolivarian Republic of)" if country=="Venezuela"
replace country="China" if country=="People's Republic of China"
replace country="Taiwan, Republic of China" if country=="Chinese Taipei"


** Rectengularize so that have missings for later nonOECD countries
tab cou
isid cou year 
fillin cou year
tab cou if _fillin==1
drop _fillin
*****

*now add in oecd avg index after recangularization
merge m:1  year using "IEAEnergypriceindex/price_index_oecd"
drop _m


merge m:1 country using "Country Isocodes/isocodes_country"
tab cou if _m==2
drop if _m==2  //some exotic non oecd countries

drop _m 

order iso*

merge m:1 isoalpha3code year using "Other data/GDP deflator/GDP_deflator"
drop if _m==2 // countries that are not needed
drop _m

merge m:1 isoalpha3code year using "Other data/Exchange rates/exchange_rates"
drop if _m==2 // countries that are not needed
drop _m


merge m:1 isoalpha3code year using "Other data/PPP conversion factor/PPPfactors.dta"
// no data on taiwan
drop if _m==2 // countries that are not needed
drop _m

merge 1:1 country year using "nonOECD_otherpricedata/Germany coal/destatis/germany_coalindex.dta", update
replace mdummy=2 if _m==2
replace mdummy=2 if _m==4
drop if _m==2
drop _m

compress

order deflator exch, after(year)


*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
***5) calculate series in constant USD
*$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$

rename (highsulphurfueloil lowsulphur lightfueloil naturalgas steamcoal cokingcoal electricity liquef totalenergy_realindex_industry oilproducts_realindex_industry /*
*/ electricity_realindex_industry naturalgas_realindex_industry coal_realindex_industry  lightfueloil_realindex_industry heavyfueloil_realindex_industry totalenergy_nomindex_industry oilproducts_nomindex_industry electricity_nomindex_industry naturalgas_nomindex_industry coal_nomindex_industry  lightfueloil_nomindex_industry heavyfueloil_nomindex_industry) /*
*/ (oil_hs oil_ls oil_light gas coal_steam coal_coke electricity lpgas                rex_totenergy rex_oil_tot rex_electricity rex_gas rex_coal  rex_oil_light rex_oil_heavy nox_totenergy nox_oil_tot nox_electricity                           nox_gas nox_coal  nox_oil_light nox_oil_heavy)


****
* need to convert indian gas price and all brazilian prices into NCU:

replace gas=gas*exchange_rate if country=="India" | country=="Brazil"

local fuel "oil_hs oil_ls oil_light coal_steam coal_coke electricity lpgas"
foreach x of local fuel{
replace `x'=`x'*exchange_rate if country=="Brazil"
}
****

*a) make deflator constant base year 2010:
********************
xtset isonum year, yearly
recast double deflator 
gen double growth_defl=(deflator-L.deflator)/L.deflator
gen double deflator2010=1 if year==2010 
assert deflator!=. if year==2010
order growth_defl deflator2010, after(deflator)

forvalues i = 1/10 {
	replace deflator2010=L.deflator2010*(1+growth_defl) if deflator2010==.
}
forvalues i = 1/50 {
	replace deflator2010=F.deflator2010/(1+F.growth_defl) if deflator2010==.
}

drop growth_defl

label var deflator2010 "GDP deflator (consistent base year 2010)"

assert abs(deflator2010-((1+(deflator-L.deflator)/L.deflator)*L.deflator2010))<=0.0000001  if deflator2010!=.  & L.deflator2010!=.



*b) convert real NCU to real USD using a scalar in 2010 (= deflator*nomER, but deflator=1) for all years:
// here the ER notation is LCU/USD
********************

gen temp = deflator2010/exchange_rate if year == 2010
bysort country: egen scalar=total(temp)

local fuel "oil_hs oil_ls oil_light gas coal_steam coal_coke electricity lpgas"

foreach f of local fuel {
gen `f'_NCU_real=`f'/deflator2010
gen `f'_consUS=`f'_NCU_real*scalar
order `f'_consUS `f'_NCU_real, after(`f')
}

drop scalar temp
sort country year

local fuel "oil_hs_consUS oil_ls_consUS oil_light_consUS gas_consUS coal_steam_consUS coal_coke_consUS electricity_consUS lpgas_consUS"
foreach f of local fuel {
label var `f' "Constant USD per toe"
}

local fuel "wi_oilproducts wi_electricity wi_naturalgas wi_coal"

foreach x of local fuel{
replace `x'=`x'/deflator2010
label var `x' "Real wholesale price index (different base years)"
}

*c) deflate nominal coal index for GERMANY
********************

*and need base year 2010
xtset isonum year, yearly
recast double nox_coal 
gen double growth_defl=(nox_coal-L.nox_coal)/L.nox_coal if country=="Germany"
replace  nox_coal=100 if year==2010 & nox_coal!=.

forvalues i = 1/10 {
	replace nox_coal=L.nox_coal*(1+growth_defl) if nox_coal==. & country=="Germany"
}
forvalues i = 1/50 {
	replace nox_coal=F.nox_coal/(1+F.growth_defl) if nox_coal==. & country=="Germany"
}

drop growth_defl

*get rex:
replace rex_coal=nox_coal/deflator2010 if country=="Germany"

recast float deflator deflator2010 rex_coal nox_coal

*d) deflate the PPP series
********************
gen temp = deflator2010/PPPfactor if year == 2010
bysort country: egen scalar=total(temp), missing

local fuel "oil_hs oil_ls oil_light gas coal_steam coal_coke electricity lpgas"

foreach f of local fuel {
gen `f'_PPPcons=`f'_NCU_real*scalar
order `f'_PPPcons, after(`f'_NCU_real)
}

drop scalar temp
sort country year

save "Energy_prices", replace


//save GDP deflator and exchange rate data in one file for other uses:

keep isoalpha2code isoalpha3code isonum country year deflator deflator2010 exchange_rate PPPfactor

save "Other data/ER_GDPdeflator.dta", replace

